Intereactive Plots - STARTER

# load packages
library(tidyverse)
library(gapminder)
library(plotly)
library(corrplot)
library(GGally)
library(crosstalk)
library(DT)
library(ggforce)

ggplotly()

Building simple interactive plots

ggplot(data = diamonds,
       aes(x = cut)) + 
  geom_bar()

Interactivity for other plots

p <- ggplot(data = diamonds,
            aes(x = cut,
                fill = clarity)) + 
  geom_bar(position = "dodge")
ggplotly(p)
p <- ggplot(data = diamonds,
            aes(x = price)) + 
  geom_histogram() + 
  facet_grid(cut ~ .,
             scales = "free_y")
ggplotly(p)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p <- ggplot(data = diamonds,
       aes(x = cut,
           y = price)) + 
  geom_boxplot()
ggplotly(p)

Exercise

  • Using the code chunk below, create a proportionally stacked bar chart of price by clarity using the diamonds dataset, then add interactivity. Does all of the interactivity features work well with this plot type?

New plots with interactivity

p <- ggplot(data = diamonds,
       aes(x = log(carat),
           y = log(price))) + 
  geom_point()
ggplotly(p)
p <- ggplot(data = diamonds,
            aes(x = log(carat),
                y = log(price))) + 
  geom_point()
ggplotly(p)

Application

?gapminder
head(gapminder)
# A tibble: 6 × 6
  country     continent  year lifeExp      pop gdpPercap
  <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
1 Afghanistan Asia       1952    28.8  8425333      779.
2 Afghanistan Asia       1957    30.3  9240934      821.
3 Afghanistan Asia       1962    32.0 10267083      853.
4 Afghanistan Asia       1967    34.0 11537966      836.
5 Afghanistan Asia       1972    36.1 13079460      740.
6 Afghanistan Asia       1977    38.4 14880372      786.

Rebuilding plots with plot_ly()

plot_ly() basics

Building plotly objects

layout(
  plot_ly(diamonds, x = ~cut),
  title = "My beatiful histogram"
)
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#histogram
plot_ly(diamonds, x = ~cut) %>% layout(title = "My beatiful histogram")
No trace type specified:
  Based on info supplied, a 'histogram' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#histogram
diamonds %>%
  plot_ly() %>% 
  add_histogram(x = ~cut)

Common plotly plots

Bars and histograms

# preview data
mtcars %>% tibble::rownames_to_column(var = "model")
                 model  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1            Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
2        Mazda RX4 Wag 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
3           Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
4       Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
5    Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
6              Valiant 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
7           Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
8            Merc 240D 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
9             Merc 230 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
10            Merc 280 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
11           Merc 280C 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
12          Merc 450SE 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
13          Merc 450SL 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
14         Merc 450SLC 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
15  Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
16 Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
17   Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
18            Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
19         Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
20      Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
21       Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
22    Dodge Challenger 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
23         AMC Javelin 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
24          Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
25    Pontiac Firebird 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
26           Fiat X1-9 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
27       Porsche 914-2 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
28        Lotus Europa 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
29      Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
30        Ferrari Dino 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
31       Maserati Bora 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
32          Volvo 142E 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

Exercise

  • Using the code chunk below and the gapminder dataset, create a bar graph of the number of countries per continent in only the first year of data collection. Can you create this bar graph two different ways?

CHALLENGE: Polish this plot by sorting by descending frequency, adding data labels on top of the bars, adding an informative title and hiding the legend.

Boxplots and schema()

diamonds %>%
  plot_ly(x = ~price,
          y = ~cut) %>% 
  add_boxplot(boxpoints = FALSE)

Exercise

  • Using the code chunk below and the iris dataset, create comparative boxplots of Sepal.Width for each Species, sorted by descending mean.

Scatterplots

Application

Exercise

  • Using the code chunk below and the iris dataset, create two scatterplots of Sepal.Width by Sepal.Length:

    1. Scatterplot 1: The color of every point is green, and the mouse over info also displays the Species.

    2. Scatterplot 2: Color each point by Species, except we want to the colors to be as follows: setosa = darkgreen, versicolor = green, virginica = grey.

Line plots

data_sun <- data.frame(year = c(1700:1988),
                       sunspots = as.vector(sunspot.year))
head(economics)
# A tibble: 6 × 6
  date         pce    pop psavert uempmed unemploy
  <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
1 1967-07-01  507. 198712    12.6     4.5     2944
2 1967-08-01  510. 198911    12.6     4.7     2945
3 1967-09-01  516. 199113    11.9     4.6     2958
4 1967-10-01  512. 199311    12.9     4.9     3143
5 1967-11-01  517. 199498    12.8     4.7     3066
6 1967-12-01  525. 199657    11.8     4.8     3018
econ <- economics %>%
  mutate(year = year(date),
         month = month(date))
econ %>% 
  group_by(year) %>% 
  plot_ly(x = ~month,
          y = ~unemploy) %>% 
  add_lines(color = ~ordered(year))

Application

gapminder %>% 
  group_by(country) %>% 
  plot_ly(x = ~year, y = ~lifeExp) %>% 
  add_lines(color = ~continent)
gapminder %>% 
  group_by(country) %>% 
  plot_ly(x = ~year, y = ~lifeExp) %>% 
  add_lines(color = ~continent)

Other types of plotly plots

2D histogram and heatmap

corr <- diamonds %>% 
  select(where(is.numeric)) %>% 
  cor
corrplot::corrplot(corr)
corr %>% 
  data.frame %>% 
  plot_ly(x = rownames(corr), y = colnames(corr), z = corr) %>% 
  add_heatmap(colors = "RdBu") %>% 
  colorbar(limits = c(-1, 1))

Exercise

  • Using the code chunk below, create the following graphs:

    1. An interactive 2D histogram for Petal.Width and Petal.Length from the iris dataset. What other type of plot can we make to display two quantitative variables that may be a better choice for this data?

    2. An interactive heatmap for color by clarity from the diamonds dataset. Note that the best way to do this is to let plotly guess the plot type when supplying two categorical variables to x and y.

Slope graphs and dumbell charts

gapminder %>% 
  filter(year %in% c(min(year), max(year))) %>% 
  summarize(.by = c(continent, year),
           avg_lifeExp = round(mean(lifeExp), 1)) %>% 
  pivot_wider(names_from = year,
              values_from = avg_lifeExp,
              names_prefix = "year_") 
  
  # add plotly code
  
  layout(title = "Gapminder average life expectancy",
         xaxis = list(ticktext = c("1952", "2007"),
                      tickvals = c(1, 2),
                      zeroline = FALSE),
         yaxis = list(title = "",
                      showgrid = FALSE,
                      showticks = FALSE,
                      showticklabels = FALSE))
head(mpg)
# A tibble: 6 × 11
  manufacturer model displ  year   cyl trans      drv     cty   hwy fl    class 
  <chr>        <chr> <dbl> <int> <int> <chr>      <chr> <int> <int> <chr> <chr> 
1 audi         a4      1.8  1999     4 auto(l5)   f        18    29 p     compa…
2 audi         a4      1.8  1999     4 manual(m5) f        21    29 p     compa…
3 audi         a4      2    2008     4 manual(m6) f        20    31 p     compa…
4 audi         a4      2    2008     4 auto(av)   f        21    30 p     compa…
5 audi         a4      2.8  1999     6 auto(l5)   f        16    26 p     compa…
6 audi         a4      2.8  1999     6 manual(m5) f        18    26 p     compa…
# create summary data of mean mpg by model
# then create dumbell chart with segments and points
# -> manually specify color legend
mpg %>% 
  summarize(.by = model,
            across(c(cty, hwy), mean)) %>% 
  mutate(model = fct_reorder(model, cty)) %>% 
  ggplot() +
  geom_segment(aes(x = cty,
                   xend = hwy,
                   y = model,
                   yend = model),
               color = "grey") + 
  geom_point(aes(x = cty,
                 y = model,
                 color = "blue")) + 
  geom_point(aes(x = hwy,
                 y = model,
                 color = "orange")) + 
  scale_color_manual(name = "MPG",
                     values = c("blue", "orange"),
                     labels = c("city", "hwy")) + 
  theme_bw()
mpg %>% 
  summarize(.by = model,
            across(c(cty, hwy), mean)) %>% 
  mutate(model = fct_reorder(model, cty)) 
# A tibble: 38 × 3
   model                cty   hwy
   <fct>              <dbl> <dbl>
 1 a4                  18.9  28.3
 2 a4 quattro          17.1  25.8
 3 a6 quattro          16    24  
 4 c1500 suburban 2wd  12.8  17.8
 5 corvette            15.4  24.8
 6 k1500 tahoe 4wd     12.5  16.2
 7 malibu              18.8  27.6
 8 caravan 2wd         15.8  22.4
 9 dakota pickup 4wd   12.8  17  
10 durango 4wd         11.9  16  
# ℹ 28 more rows

Exercise

  • Using the code chunk below, create the following graphs:

    1. An interactive slopegraph using the mpg dataset for cty vs hwy gas mileage by model. Use the same summarizing code as for the dumbell chart (we need wide summary data). What is a problem we have to consider with this type of plot?

    2. An interactive dumbell plot using the mean lifeExp by continent from the gapminder dataset. Start with the same summarizing code as for the slopegraph (we need wide summary data again). Be sure to order the levels of continent by increasing mean for the minimum year.

Parallel coordinates plot

# create parallel coordinate plot using default options
iris %>% 
  ggparcoord(columns = 1:4, 
             groupColumn = 5,
             scale = "uniminmax",
             order = "anyClass",
             alphaLines = 0.5) +
  theme_bw()
# confirm trends with correlation matrix
cor(select(iris, where(is.numeric))) %>% round(3)
             Sepal.Length Sepal.Width Petal.Length Petal.Width
Sepal.Length        1.000      -0.118        0.872       0.818
Sepal.Width        -0.118       1.000       -0.428      -0.366
Petal.Length        0.872      -0.428        1.000       0.963
Petal.Width         0.818      -0.366        0.963       1.000

Graphical queries

Basic graphical queries

mtcars %>% 
  plot_ly(x = ~wt,
          y = ~mpg) %>% 
  add_markers() %>% 
  add_text(text = ~cyl,
           textposition = "top")

Linked brushing

mtcars %>% 
  highlight_key(~cyl) %>% 
  plot_ly(x = ~wt,
          y = ~mpg) %>% 
  add_markers() %>% 
  add_text(text = ~cyl,
           textposition = "top") %>%
  highlight(on = "plotly_hover")
Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_hover'). You can change this default via the `highlight()` function.

Application

shared_data <- highlight_key(mpg)

p <- shared_data %>% 
  plot_ly(x = ~displ,
          y = ~hwy) %>% 
  add_markers() %>% 
  highlight(on = "plotly_selected")

bscols(p, datatable(shared_data, height = 500))
Setting the `off` event (i.e., 'plotly_deselect') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.
# plot two layers
# -> one of all points with grey color
# -> another with just points of interest in a different color
# -> add legend with informative values
ggplot() + 
  geom_point(aes(x = displ,
                 y = hwy,
                 color = "Other"),
             data = mpg) + 
  geom_point(aes(x = displ,
                 y = hwy,
                 color = "Corvette"),
             data = filter(mpg, model == "corvette")) + 
  scale_color_manual(values = c("Other" = "grey", "Corvette" = "red"),
                     name = "Model") + 
  labs(title = "Fuel economy from 1999 to 2008 for 38 car models",
       caption = "Source: https://fueleconomy.gov/",
       x = "Engine Displacement",
       y = "Miles Per Gallon") + 
  theme_bw() 
ggplot(data = mpg) + 
  geom_point(aes(x = displ,
                 y = hwy)) +
  labs(title = "Fuel economy from 1999 to 2008 for 38 car models",
       caption = "Source: https://fueleconomy.gov/",
       x = "Engine Displacement",
       y = "Miles Per Gallon") + 
  theme_bw()
# show hull with colored points to point out caution when using this technique
ggplot() + 
  geom_point(aes(x = displ,
                 y = hwy),
             data = mpg) + 
  geom_point(aes(x = displ,
                 y = hwy,
                 color = "a4"),
             data = filter(mpg, model == "a4")) + 
  geom_mark_hull(aes(x = displ,
                     y = hwy,
                     filter = model == "a4",
                     label = model),
                 data = mpg) + 
  scale_color_manual(values = c("a4" = "red"),
                     name = "Model") + 
  theme_bw()

More graphical queries

gapminder %>% 
  group_by(country) %>% 
  plot_ly(x = ~year,
          y = ~lifeExp,
          text = ~country) %>% 
  add_lines(color = ~continent)

Exercise

  • Using the code chuck below, explore the ggplot2::msleep data.

    1. Create a linked brushing setup for a scatterplot of brainwt by sleep_total and the corresponding data table. Which points stand out? Which species are they?

    2. CHALLENGE: Recreate the scatterplot as a static image using ggplot2 and add annotations to the interesting species via geom_mark_*() as if it were to be in the final published work. Add nicely formatted, informative labels and titles as well.

Linking multiple plots and subplot()

mtcars %>% 
  plot_ly(x = ~ordered(cyl)) %>% 
  add_histogram()
mtcars %>% 
  plot_ly(x = ~wt,
          y = ~mpg) %>% 
  add_markers() 
p <- plot_ly(diamonds,
             y = ~price,
             color = I("black"), 
             alpha = 0.1 )

p1 <- p %>% add_boxplot(x = "Overall")
p2 <- p %>% add_boxplot(x = ~cut)

subplot(p1, p2)
p1 <- ggplot(data = diamonds,
            aes(x = price,
                color = cut)) + 
  geom_density() + 
  theme_bw()

p2 <- diamonds %>% 
  plot_ly() %>% 
   add_boxplot(x = ~price,
               y = ~cut,
               color = ~cut)
shared_data <- highlight_key(diamonds)

p1 <- ggplot(data = shared_data,
            aes(x = price,
                color = cut)) + 
  geom_density() + 
  theme_bw()

p2 <- shared_data %>% 
  plot_ly() %>% 
   add_boxplot(x = ~price,
               y = ~cut,
               color = ~cut)

subplot(ggplotly(p1), p2,
        nrows = 2,
        shareX = TRUE) %>% 
  highlight(on = "plotly_click")
Warning: Can only have one: config
Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_click'). You can change this default via the `highlight()` function.

Exercise

  • Using code chuck below and the starter code that filters and summarizes the Lahman::Batting data to team totals for the most current year then creates three density plots, do the following:

    1. CHALLENGE: Create an interactive parallel coordinates. Remember that we need long data for all of the numeric variables and we can group by teamID because that acts as the observation ID. What can we conclude from this plot, if anything?

    2. Combine these plots into a single view with subplot(); however have the three density plots in the first row and the parallel coordinates plot in the second row.

# create team summarized batting data for the most recent year
batting <- Lahman::Batting %>% 
  filter(yearID == max(yearID)) %>% 
  select(-c(stint,G)) %>% 
  summarize(.by = c(teamID, yearID, lgID), across(c(where(is.numeric)), sum)) %>% 
  mutate(yearID = as.factor(yearID)) %>% # so year doesn't get rescaled in the parallel coordinates plot
  select(where(is.factor), HR, RBI, SB) # just look at three important batting stats

# create three different density plots
p1 <- batting %>% 
  ggplot() + 
  geom_density(aes(x = HR,
                   color = lgID)) + 
  theme_bw()
p2 <- batting %>% 
  ggplot() + 
  geom_density(aes(x = RBI,
                   color = lgID)) + 
  theme_bw()
p3 <- batting %>% 
  ggplot() + 
  geom_density(aes(x = SB,
                   color = lgID)) + 
  theme_bw() 

# create parallel coordinates plot

# organize plots

Filter events

Highlight vs filter

Creating a filtered event plot

# create shared data object
shared_data <- highlight_key(txhousing)

# create highlight plot from shared data object
p <- ggplot(data = shared_data) +
  geom_line(aes(x = date,
                y = median,
                group = city))

# arrange select box for filtering shared data object and plot from same shared data object

Exercise

  • Using the code chunk below, modify / add to the code below to transform the static timeseries plot of the gapminder dataset into an interactive filtered event plot.
ggplot(data = gapminder) + 
  geom_line(aes(x = year,
                y = lifeExp,
                group = country,
                color = continent)) + 
  theme_bw()